#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2020/6/6 11:25
# @Author  : ystraw
# @Site    : 
# @File    : xinlang.py
# @Software: PyCharm Community Edition
# @function:

import requests
from bs4 import BeautifulSoup
import bs4
import json
from openpyxl import Workbook
import time
import datetime

# 写入Excel
def write_excel_xls(path, value,bHead):
    # 获取需要写入数据的行数
    index = len(value)
    # 获取需要写入数据的行数
    index = len(value)
    wb = Workbook()
    # 激活 worksheet
    ws = wb.active
    # 第一行输入
    ws.append(bHead)
    # .cell(row=x, column=2, value=z.project)
    for i in range(2, index+2):
        for j in range(1, len(value[i-2]) + 1):
            # ws.append(value[i])
            try:
                ws.cell(row=i, column=j, value=value[i-2][j-1])
            except Exception as ex:
                print('i，j=', i, j, ' 单元格写入出错！', ex)
    # 保存
    wb.save(path)
    print(path + '表格写入数据成功！')

def getdata():
    # 获得查询的终点时间戳：
    d = datetime.datetime.strptime("2020-06-01 0:0:0", "%Y-%m-%d %H:%M:%S")
    t = d.timetuple()
    timeStamp = int(time.mktime(t))

    page = 0
    flag = 1
    alldata = [] # 存储数据
    # page 最大值在37左右：
    while flag == 1 and page < 4:
        try:
            page += 1
            print('page = ', page)
            url = 'https://news.sina.com.cn/world/'
            url = 'https://interface.sina.cn/news/get_news_by_channel_new_v2018.d.html?cat_1=51923&show_num=27&level=1,2&page=%d&callback=newsloadercallback&_=1591414346860' % page
            kv = {'user-agent': 'Mozilla/5.0'}
            newsList = requests.get(url, headers=kv)
            # newsList.encoding = newsList.apparent_encoding

            response = newsList.text.encode('utf-8').decode('unicode_escape')
            # print(response)
            # 提取其中的json数据
            data = response[response.index('{'):-2]
            # print(data)
            js = json.loads(data, strict=False)
            newsList = js['result']['data']
            # print(newsList)
            for news in newsList:
                t_data = []
                t_timeStamp = news['createtime']
                if int(t_timeStamp) < timeStamp:
                    print('获取结束')
                    flag = 0
                    break
                # 转换时间格式
                t_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(t_timeStamp)))
                t_data.append(t_time)
                t_data.append(news['title'])
                alldata.append(t_data)
            print('page = ', page, '已成功获取！')
        except Exception as ex:
            print('异常：', ex)
        time.sleep(2)
    # 数据写入excel：
    # tablehead=['时间', '标题']
    # write_excel_xls('./data.xlsx', alldata, tablehead)
    # 数据写入txt：
    content = [ title[1] for title in alldata]
    with open('./xinlang.txt', 'w+', encoding='utf-8') as f:
        f.write(','.join(content))

if __name__ == '__main__':
    getdata()